package com.farm.file.util;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

public class WhtmFileUtils {
	
	public static String getFileIdFromImgUrl(String urlStr) {
		String[] download_urls = new String[] { "download/Pubfile.do?id=" };
		String fileid = null;
		for (String download_url : download_urls) {
			if (urlStr.indexOf(download_url) >= 0) {
				if (urlStr.lastIndexOf("&") > 0) {
					String splits = urlStr.substring(urlStr.indexOf(download_url) + download_url.length(),
							urlStr.lastIndexOf("&"));
					fileid = splits;
				} else {
					String splits = urlStr.substring(urlStr.indexOf(download_url) + download_url.length());
					fileid = splits;
				}
			}
			if (fileid != null) {
				return fileid;
			}
		}
		return null;
	}

	
	public static List<String> getFilesIdFromHtml(String html) {
		List<String> list = new ArrayList<String>();
		Set<String> set = new HashSet<>();
		if (html == null) {
			return list;
		}
		Document doc = Jsoup.parse(html);
		// 寻找图片
		for (Element node : doc.getElementsByTag("img")) {
			String urlStr = node.attr("src");
			String id = getFileIdFromImgUrl(urlStr);
			if (id != null) {
				set.add(id);
			}
		}
		// 寻找多媒体
		for (Element node : doc.getElementsByTag("embed")) {
			String urlStr = node.attr("src");
			String id = getFileIdFromImgUrl(urlStr);
			if (id != null) {
				set.add(id);
			}
		}
		// 寻找附件
		for (Element node : doc.getElementsByTag("a")) {
			String urlStr = node.attr("href");
			String id = getFileIdFromImgUrl(urlStr);
			if (id != null) {
				set.add(id);
			}
		}
		list.addAll(set);
		return list;
	}

	
	public static Set<String> removeStringSet(Set<String> ids1, Set<String> ids2) {
		Set<String> ids = new HashSet<>();
		for (String id1 : ids1) {
			boolean isHas = false;
			for (String id2 : ids2) {
				if (id1.equals(id2)) {
					isHas = true;
					break;
				}
			}
			if (!isHas) {
				// 添加
				ids.add(id1);
			}
		}
		return ids;
	}
}
